In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import os
In [2]:
Gen2_phenotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen2+phenotypes.p'))
Gen2_phenotypes.sort_values('test_accuracy',ascending=False).head()
Out[2]:
In [3]:
#make the misclassified DF from the best accuracy model
misclassified_df = pd.DataFrame.from_dict(Gen2_phenotypes.sort_values('test_accuracy',ascending=False).iloc[0]['misclassed'])
mistakes_dict = {}
#loop through number of classes
for i in range(46):
mistakes_for_class_i = list(misclassified_df[misclassified_df.true_class==i]['pred_class'])
col_mistakes = []
#loop again through number of classes
for j in range(46):
if j in mistakes_for_class_i:
mistake_count = len(misclassified_df[(misclassified_df.true_class==i) & (misclassified_df.pred_class==j)])
col_mistakes.append(mistake_count)
else:
col_mistakes.append(0)
mistakes_dict[str(i)] = col_mistakes
#column headers refer to true classes
#row indices refer to predicted classes
misclass_heat_df = pd.DataFrame.from_dict(mistakes_dict)
col_order = [str(i) for i in range(46)]
misclass_heat_df = misclass_heat_df.reindex(columns=col_order)
#Make a heatmap of miscalssified classes
sns.set(font_scale=3.0)
#ax = sns.heatmap(misclass_heat_df,cmap=plt.cm.Blues, linewidths=.1)
ax = sns.heatmap(misclass_heat_df,linewidths=.1)
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
ax.set_xlabel('True Classes',labelpad=20)
ax.xaxis.label.set_fontsize(28)
ax.set_ylabel('Predicted Classes',labelpad=10)
ax.yaxis.label.set_fontsize(28)
txt = '''Misclassification Counts'''
fig = ax.get_figure()
fig.set_size_inches(20, 15)
fig.text(0.3,.1,txt)
for label in ax.get_xticklabels():
label.set_fontsize(12)
for label in ax.get_yticklabels():
label.set_fontsize(12)
fig.savefig('Gen2_best_accuracy_misclass_heatmap.png')
In [4]:
#make the misclassified DF from the best accuracy model
misclassified_df = pd.DataFrame.from_dict(Gen2_phenotypes.sort_values('test_accuracy',ascending=False).iloc[-1]['misclassed'])
mistakes_dict = {}
#loop through number of classes
for i in range(46):
mistakes_for_class_i = list(misclassified_df[misclassified_df.true_class==i]['pred_class'])
col_mistakes = []
#loop again through number of classes
for j in range(46):
if j in mistakes_for_class_i:
mistake_count = len(misclassified_df[(misclassified_df.true_class==i) & (misclassified_df.pred_class==j)])
col_mistakes.append(mistake_count)
else:
col_mistakes.append(0)
mistakes_dict[str(i)] = col_mistakes
#column headers refer to true classes
#row indices refer to predicted classes
misclass_heat_df = pd.DataFrame.from_dict(mistakes_dict)
col_order = [str(i) for i in range(46)]
misclass_heat_df = misclass_heat_df.reindex(columns=col_order)
#Make a heatmap of miscalssified classes
sns.set(font_scale=3.0)
#ax = sns.heatmap(misclass_heat_df,cmap=plt.cm.Blues, linewidths=.1)
ax = sns.heatmap(misclass_heat_df,linewidths=.1)
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
ax.set_xlabel('True Classes',labelpad=20)
ax.xaxis.label.set_fontsize(28)
ax.set_ylabel('Predicted Classes',labelpad=10)
ax.yaxis.label.set_fontsize(28)
txt = '''Misclassification Counts'''
fig = ax.get_figure()
fig.set_size_inches(20, 15)
fig.text(0.3,.1,txt)
for label in ax.get_xticklabels():
label.set_fontsize(12)
for label in ax.get_yticklabels():
label.set_fontsize(12)
fig.savefig('Gen2_worst_accuracy_misclass_heatmap.png')
In [5]:
misclassified_df.shape
Out[5]: